library(gridExtra)
library(knitr)
library(ggplot2)
library(fastqcr)
#must run this if fastqc is not already installed locally
fastqc_install()
###ONLY THIS CHUNK REQUIRES MODIFICATION###
###assign your directory locations here:
#specify full path to directory containing a .fastq.gz file for each sample
fq.dir<-"/home/a619f280/work/phil.stachyris/fastq"
#specify full path to the output directory where you want
qc.dir<-"/home/a619f280/work/phil.stachyris/qc"
#run fastqc on all .fastq.gz files, through r
#This only needs to be run once, if only tweaking downstream visualizations, you can comment out this step
fastqc(fq.dir = fq.dir, # FASTQ files directory
qc.dir = qc.dir, # Results directory
threads = 4 # Number of threads
)
# List of files in the output directory to ensure fastqc worked
list.files(qc.dir)
[1] “fastqcr.Rmd”
[2] “run.fastqcr.sh”
[3] “S_capitalis_28326_fastqc.html”
[4] “S_capitalis_28326_fastqc.zip”
[5] “S_capitalis_28338_fastqc.html”
[6] “S_capitalis_28338_fastqc.zip”
[7] “S_capitalis_28339_fastqc.html”
[8] “S_capitalis_28339_fastqc.zip”
[9] “S_capitalis_28341_fastqc.html”
[10] “S_capitalis_28341_fastqc.zip”
[11] “S_capitalis_28342_fastqc.html”
[12] “S_capitalis_28342_fastqc.zip”
[13] “S_capitalis_29959_fastqc.html”
[14] “S_capitalis_29959_fastqc.zip”
[15] “S_capitalis_29965_fastqc.html”
[16] “S_capitalis_29965_fastqc.zip”
[17] “S_capitalis_29968_fastqc.html”
[18] “S_capitalis_29968_fastqc.zip”
[19] “S_capitalis_CMNH37769_fastqc.html”
[20] “S_capitalis_CMNH37769_fastqc.zip”
[21] “S_dennistouni_19648_fastqc.html”
[22] “S_dennistouni_19648_fastqc.zip”
[23] “S_dennistouni_19656_fastqc.html”
[24] “S_dennistouni_19656_fastqc.zip”
[25] “S_dennistouni_20186_fastqc.html”
[26] “S_dennistouni_20186_fastqc.zip”
[27] “S_dennistouni_20187_fastqc.html”
[28] “S_dennistouni_20187_fastqc.zip”
[29] “S_dennistouni_20188_fastqc.html”
[30] “S_dennistouni_20188_fastqc.zip”
[31] “S_dennistouni_20191_fastqc.html”
[32] “S_dennistouni_20191_fastqc.zip”
[33] “S_dennistouni_20201_fastqc.html”
[34] “S_dennistouni_20201_fastqc.zip”
[35] “S_dennistouni_20222_fastqc.html”
[36] “S_dennistouni_20222_fastqc.zip”
[37] “S_dennistouni_20224_fastqc.html”
[38] “S_dennistouni_20224_fastqc.zip”
[39] “S_dennistouni_20225_fastqc.html”
[40] “S_dennistouni_20225_fastqc.zip”
[41] “S_dennistouni_20229_fastqc.html”
[42] “S_dennistouni_20229_fastqc.zip”
[43] “S_dennistouni_20234_fastqc.html”
[44] “S_dennistouni_20234_fastqc.zip”
[45] “S_dennistouni_20335_fastqc.html”
[46] “S_dennistouni_20335_fastqc.zip”
[47] “S_dennistouni_21084_fastqc.html”
[48] “S_dennistouni_21084_fastqc.zip”
[49] “S_dennistouni_21086_fastqc.html”
[50] “S_dennistouni_21086_fastqc.zip”
[51] “S_dennistouni_21112_fastqc.html”
[52] “S_dennistouni_21112_fastqc.zip”
[53] “S_dennistouni_25696_fastqc.html”
[54] “S_dennistouni_25696_fastqc.zip”
[55] “S_dennistouni_25702_fastqc.html”
[56] “S_dennistouni_25702_fastqc.zip”
[57] “S_dennistouni_25703_fastqc.html”
[58] “S_dennistouni_25703_fastqc.zip”
[59] “S_dennistouni_25713_fastqc.html”
[60] “S_dennistouni_25713_fastqc.zip”
[61] “S_dennistouni_25716_fastqc.html”
[62] “S_dennistouni_25716_fastqc.zip”
[63] “S_dennistouni_25743_fastqc.html”
[64] “S_dennistouni_25743_fastqc.zip”
[65] “S_dennistouni_25817_fastqc.html”
[66] “S_dennistouni_25817_fastqc.zip”
[67] “S_dennistouni_25828_fastqc.html”
[68] “S_dennistouni_25828_fastqc.zip”
[69] “S_dennistouni_25829_fastqc.html”
[70] “S_dennistouni_25829_fastqc.zip”
[71] “S_dennistouni_25846_fastqc.html”
[72] “S_dennistouni_25846_fastqc.zip”
[73] “S_dennistouni_25885_fastqc.html”
[74] “S_dennistouni_25885_fastqc.zip”
[75] “S_dennistouni_25898_fastqc.html”
[76] “S_dennistouni_25898_fastqc.zip”
[77] “S_dennistouni_25903_fastqc.html”
[78] “S_dennistouni_25903_fastqc.zip”
[79] “S_dennistouni_25908_fastqc.html”
[80] “S_dennistouni_25908_fastqc.zip”
[81] “S_dennistouni_25939_fastqc.html”
[82] “S_dennistouni_25939_fastqc.zip”
[83] “S_dennistouni_25950_fastqc.html”
[84] “S_dennistouni_25950_fastqc.zip”
[85] “S_dennistouni_26573_fastqc.html”
[86] “S_dennistouni_26573_fastqc.zip”
[87] “S_dennistouni_26579_fastqc.html”
[88] “S_dennistouni_26579_fastqc.zip”
[89] “S_dennistouni_26961_fastqc.html”
[90] “S_dennistouni_26961_fastqc.zip”
[91] “S_dennistouni_CMNH38201_fastqc.html”
[92] “S_dennistouni_CMNH38201_fastqc.zip”
[93] “S_nigrocapitata_14192_fastqc.html”
[94] “S_nigrocapitata_14192_fastqc.zip”
[95] “S_nigrocapitata_14199_fastqc.html”
[96] “S_nigrocapitata_14199_fastqc.zip”
[97] “S_nigrocapitata_18034_fastqc.html”
[98] “S_nigrocapitata_18034_fastqc.zip”
[99] “S_nigrocapitata_18040_fastqc.html”
[100] “S_nigrocapitata_18040_fastqc.zip”
[101] “S_nigrocapitata_18083_fastqc.html”
[102] “S_nigrocapitata_18083_fastqc.zip”
[103] “S_nigrocapitata_25550_fastqc.html”
[104] “S_nigrocapitata_25550_fastqc.zip”
[105] “S_nigrocapitata_25551_fastqc.html”
[106] “S_nigrocapitata_25551_fastqc.zip”
[107] “S_nigrocapitata_28214_fastqc.html”
[108] “S_nigrocapitata_28214_fastqc.zip”
[109] “S_nigrocapitata_28215_fastqc.html”
[110] “S_nigrocapitata_28215_fastqc.zip”
[111] “S_nigrocapitata_33030_fastqc.html”
[112] “S_nigrocapitata_33030_fastqc.zip”
[113] “S_nigrocapitata_33060_fastqc.html”
[114] “S_nigrocapitata_33060_fastqc.zip”
[115] “S_nigrocapitata_FMNH472765_fastqc.html” [116] “S_nigrocapitata_FMNH472765_fastqc.zip” [117] “S_plateni_19056_fastqc.html”
[118] “S_plateni_19056_fastqc.zip”
[119] “S_plateni_28305_fastqc.html”
[120] “S_plateni_28305_fastqc.zip”
[121] “S_plateni_28350_fastqc.html”
[122] “S_plateni_28350_fastqc.zip”
[123] “S_whiteheadi_18001_fastqc.html”
[124] “S_whiteheadi_18001_fastqc.zip”
[125] “S_whiteheadi_20988_fastqc.html”
[126] “S_whiteheadi_20988_fastqc.zip”
[127] “slurm-36787160.out”
[128] “slurm-36787164.out”
[129] “slurm-36787165.out”
[130] “slurm-36807798.out”
[131] "Stach__FMNH449754_fastqc.html"
[132] "Stach__FMNH449754_fastqc.zip"
[133] "Stach__FMNH449756_fastqc.html"
[134] "Stach__FMNH449756_fastqc.zip"
#create a character vector where each value is the full path to the .zip created by fastqc() for a given sample
samps<-list.files(qc.dir, full.names = T, pattern = "*.zip")
#plot qc test results for each sample
for (i in samps){
#read info for given sample from the .zip file generated in the previous step
samp.info <- qc_read(i)
#open blank list to hold qc visualizations for the given sample
plot<-list()
#do qc for the given sample
plot[[1]]<-qc_plot(samp.info, "Basic statistics")
plot[[2]]<-qc_plot(samp.info, "Per sequence quality scores")
plot[[3]]<-qc_plot(samp.info, "Sequence duplication levels")
#visualize tables
print(paste0("QC results for sample ", gsub(".*/", "", i)))
cat('\n')
print(kable(plot[[1]]))
cat('\n')
#visualize plots
grid.arrange(plot[[2]],plot[[3]],
ncol=2)
#clear plot to hold info for next sample
rm(plot)
}
[1] “QC results for sample S_capitalis_28326_fastqc.zip”
| Measure | Value |
|---|---|
| Filename | S_capitalis_28326.fq.gz |
| File type | Conventional base calls |
| Encoding | Sanger / Illumina 1.9 |
| Total Sequences | 2271883 |
| Sequences flagged as poor quality | 0 |
| Sequence length | 95 |
| %GC | 37 |
[1] “QC results for sample S_capitalis_28338_fastqc.zip”
| Measure | Value |
|---|---|
| Filename | S_capitalis_28338.fq.gz |
| File type | Conventional base calls |
| Encoding | Sanger / Illumina 1.9 |
| Total Sequences | 7353870 |
| Sequences flagged as poor quality | 0 |
| Sequence length | 95 |
| %GC | 37 |
[1] “QC results for sample S_capitalis_28339_fastqc.zip”
| Measure | Value |
|---|---|
| Filename | S_capitalis_28339.fq.gz |
| File type | Conventional base calls |
| Encoding | Sanger / Illumina 1.9 |
| Total Sequences | 4011414 |
| Sequences flagged as poor quality | 0 |
| Sequence length | 95 |
| %GC | 37 |
[1] “QC results for sample S_capitalis_28341_fastqc.zip”
| Measure | Value |
|---|---|
| Filename | S_capitalis_28341.fq.gz |
| File type | Conventional base calls |
| Encoding | Sanger / Illumina 1.9 |
| Total Sequences | 1289015 |
| Sequences flagged as poor quality | 0 |
| Sequence length | 95 |
| %GC | 37 |
[1] “QC results for sample S_capitalis_28342_fastqc.zip”
| Measure | Value |
|---|---|
| Filename | S_capitalis_28342.fq.gz |
| File type | Conventional base calls |
| Encoding | Sanger / Illumina 1.9 |
| Total Sequences | 2393217 |
| Sequences flagged as poor quality | 0 |
| Sequence length | 95 |
| %GC | 37 |
[1] “QC results for sample S_capitalis_29959_fastqc.zip”
| Measure | Value |
|---|---|
| Filename | S_capitalis_29959.fq.gz |
| File type | Conventional base calls |
| Encoding | Sanger / Illumina 1.9 |
| Total Sequences | 2212314 |
| Sequences flagged as poor quality | 0 |
| Sequence length | 95 |
| %GC | 37 |
[1] “QC results for sample S_capitalis_29965_fastqc.zip”
| Measure | Value |
|---|---|
| Filename | S_capitalis_29965.fq.gz |
| File type | Conventional base calls |
| Encoding | Sanger / Illumina 1.9 |
| Total Sequences | 1115542 |
| Sequences flagged as poor quality | 0 |
| Sequence length | 95 |
| %GC | 37 |
[1] “QC results for sample S_capitalis_29968_fastqc.zip”
| Measure | Value |
|---|---|
| Filename | S_capitalis_29968.fq.gz |
| File type | Conventional base calls |
| Encoding | Sanger / Illumina 1.9 |
| Total Sequences | 7389825 |
| Sequences flagged as poor quality | 0 |
| Sequence length | 95 |
| %GC | 37 |
[1] “QC results for sample S_capitalis_CMNH37769_fastqc.zip”
| Measure | Value |
|---|---|
| Filename | S_capitalis_CMNH37769.fq.gz |
| File type | Conventional base calls |
| Encoding | Sanger / Illumina 1.9 |
| Total Sequences | 3199634 |
| Sequences flagged as poor quality | 0 |
| Sequence length | 95 |
| %GC | 37 |
[1] “QC results for sample S_dennistouni_19648_fastqc.zip”
| Measure | Value |
|---|---|
| Filename | S_dennistouni_19648.fq.gz |
| File type | Conventional base calls |
| Encoding | Sanger / Illumina 1.9 |
| Total Sequences | 617100 |
| Sequences flagged as poor quality | 0 |
| Sequence length | 95 |
| %GC | 37 |
[1] “QC results for sample S_dennistouni_19656_fastqc.zip”
| Measure | Value |
|---|---|
| Filename | S_dennistouni_19656.fq.gz |
| File type | Conventional base calls |
| Encoding | Sanger / Illumina 1.9 |
| Total Sequences | 8983624 |
| Sequences flagged as poor quality | 0 |
| Sequence length | 95 |
| %GC | 37 |
[1] “QC results for sample S_dennistouni_20186_fastqc.zip”
| Measure | Value |
|---|---|
| Filename | S_dennistouni_20186.fq.gz |
| File type | Conventional base calls |
| Encoding | Sanger / Illumina 1.9 |
| Total Sequences | 3219997 |
| Sequences flagged as poor quality | 0 |
| Sequence length | 95 |
| %GC | 37 |
[1] “QC results for sample S_dennistouni_20187_fastqc.zip”
| Measure | Value |
|---|---|
| Filename | S_dennistouni_20187.fq.gz |
| File type | Conventional base calls |
| Encoding | Sanger / Illumina 1.9 |
| Total Sequences | 3879876 |
| Sequences flagged as poor quality | 0 |
| Sequence length | 95 |
| %GC | 37 |
[1] “QC results for sample S_dennistouni_20188_fastqc.zip”
| Measure | Value |
|---|---|
| Filename | S_dennistouni_20188.fq.gz |
| File type | Conventional base calls |
| Encoding | Sanger / Illumina 1.9 |
| Total Sequences | 3825147 |
| Sequences flagged as poor quality | 0 |
| Sequence length | 95 |
| %GC | 37 |
[1] “QC results for sample S_dennistouni_20191_fastqc.zip”
| Measure | Value |
|---|---|
| Filename | S_dennistouni_20191.fq.gz |
| File type | Conventional base calls |
| Encoding | Sanger / Illumina 1.9 |
| Total Sequences | 3882279 |
| Sequences flagged as poor quality | 0 |
| Sequence length | 95 |
| %GC | 37 |
[1] “QC results for sample S_dennistouni_20201_fastqc.zip”
| Measure | Value |
|---|---|
| Filename | S_dennistouni_20201.fq.gz |
| File type | Conventional base calls |
| Encoding | Sanger / Illumina 1.9 |
| Total Sequences | 215019 |
| Sequences flagged as poor quality | 0 |
| Sequence length | 95 |
| %GC | 37 |
[1] “QC results for sample S_dennistouni_20222_fastqc.zip”
| Measure | Value |
|---|---|
| Filename | S_dennistouni_20222.fq.gz |
| File type | Conventional base calls |
| Encoding | Sanger / Illumina 1.9 |
| Total Sequences | 7410303 |
| Sequences flagged as poor quality | 0 |
| Sequence length | 95 |
| %GC | 37 |
[1] “QC results for sample S_dennistouni_20224_fastqc.zip”
| Measure | Value |
|---|---|
| Filename | S_dennistouni_20224.fq.gz |
| File type | Conventional base calls |
| Encoding | Sanger / Illumina 1.9 |
| Total Sequences | 4820610 |
| Sequences flagged as poor quality | 0 |
| Sequence length | 95 |
| %GC | 37 |
[1] “QC results for sample S_dennistouni_20225_fastqc.zip”
| Measure | Value |
|---|---|
| Filename | S_dennistouni_20225.fq.gz |
| File type | Conventional base calls |
| Encoding | Sanger / Illumina 1.9 |
| Total Sequences | 229061 |
| Sequences flagged as poor quality | 0 |
| Sequence length | 95 |
| %GC | 36 |
[1] “QC results for sample S_dennistouni_20229_fastqc.zip”
| Measure | Value |
|---|---|
| Filename | S_dennistouni_20229.fq.gz |
| File type | Conventional base calls |
| Encoding | Sanger / Illumina 1.9 |
| Total Sequences | 2196690 |
| Sequences flagged as poor quality | 0 |
| Sequence length | 95 |
| %GC | 37 |
[1] “QC results for sample S_dennistouni_20234_fastqc.zip”
| Measure | Value |
|---|---|
| Filename | S_dennistouni_20234.fq.gz |
| File type | Conventional base calls |
| Encoding | Sanger / Illumina 1.9 |
| Total Sequences | 67716 |
| Sequences flagged as poor quality | 0 |
| Sequence length | 95 |
| %GC | 36 |
[1] “QC results for sample S_dennistouni_20335_fastqc.zip”
| Measure | Value |
|---|---|
| Filename | S_dennistouni_20335.fq.gz |
| File type | Conventional base calls |
| Encoding | Sanger / Illumina 1.9 |
| Total Sequences | 354359 |
| Sequences flagged as poor quality | 0 |
| Sequence length | 95 |
| %GC | 37 |
[1] “QC results for sample S_dennistouni_21084_fastqc.zip”
| Measure | Value |
|---|---|
| Filename | S_dennistouni_21084.fq.gz |
| File type | Conventional base calls |
| Encoding | Sanger / Illumina 1.9 |
| Total Sequences | 2795688 |
| Sequences flagged as poor quality | 0 |
| Sequence length | 95 |
| %GC | 37 |
[1] “QC results for sample S_dennistouni_21086_fastqc.zip”
| Measure | Value |
|---|---|
| Filename | S_dennistouni_21086.fq.gz |
| File type | Conventional base calls |
| Encoding | Sanger / Illumina 1.9 |
| Total Sequences | 14622 |
| Sequences flagged as poor quality | 0 |
| Sequence length | 95 |
| %GC | 37 |
[1] “QC results for sample S_dennistouni_21112_fastqc.zip”
| Measure | Value |
|---|---|
| Filename | S_dennistouni_21112.fq.gz |
| File type | Conventional base calls |
| Encoding | Sanger / Illumina 1.9 |
| Total Sequences | 3352183 |
| Sequences flagged as poor quality | 0 |
| Sequence length | 95 |
| %GC | 37 |
[1] “QC results for sample S_dennistouni_25696_fastqc.zip”
| Measure | Value |
|---|---|
| Filename | S_dennistouni_25696.fq.gz |
| File type | Conventional base calls |
| Encoding | Sanger / Illumina 1.9 |
| Total Sequences | 3189487 |
| Sequences flagged as poor quality | 0 |
| Sequence length | 95 |
| %GC | 37 |
[1] “QC results for sample S_dennistouni_25702_fastqc.zip”
| Measure | Value |
|---|---|
| Filename | S_dennistouni_25702.fq.gz |
| File type | Conventional base calls |
| Encoding | Sanger / Illumina 1.9 |
| Total Sequences | 4377719 |
| Sequences flagged as poor quality | 0 |
| Sequence length | 95 |
| %GC | 37 |
[1] “QC results for sample S_dennistouni_25703_fastqc.zip”
| Measure | Value |
|---|---|
| Filename | S_dennistouni_25703.fq.gz |
| File type | Conventional base calls |
| Encoding | Sanger / Illumina 1.9 |
| Total Sequences | 4087648 |
| Sequences flagged as poor quality | 0 |
| Sequence length | 95 |
| %GC | 37 |
[1] “QC results for sample S_dennistouni_25713_fastqc.zip”
| Measure | Value |
|---|---|
| Filename | S_dennistouni_25713.fq.gz |
| File type | Conventional base calls |
| Encoding | Sanger / Illumina 1.9 |
| Total Sequences | 14815 |
| Sequences flagged as poor quality | 0 |
| Sequence length | 95 |
| %GC | 36 |
[1] “QC results for sample S_dennistouni_25716_fastqc.zip”
| Measure | Value |
|---|---|
| Filename | S_dennistouni_25716.fq.gz |
| File type | Conventional base calls |
| Encoding | Sanger / Illumina 1.9 |
| Total Sequences | 2043166 |
| Sequences flagged as poor quality | 0 |
| Sequence length | 95 |
| %GC | 36 |
[1] “QC results for sample S_dennistouni_25743_fastqc.zip”
| Measure | Value |
|---|---|
| Filename | S_dennistouni_25743.fq.gz |
| File type | Conventional base calls |
| Encoding | Sanger / Illumina 1.9 |
| Total Sequences | 404420 |
| Sequences flagged as poor quality | 0 |
| Sequence length | 95 |
| %GC | 37 |
[1] “QC results for sample S_dennistouni_25817_fastqc.zip”
| Measure | Value |
|---|---|
| Filename | S_dennistouni_25817.fq.gz |
| File type | Conventional base calls |
| Encoding | Sanger / Illumina 1.9 |
| Total Sequences | 3473626 |
| Sequences flagged as poor quality | 0 |
| Sequence length | 95 |
| %GC | 37 |
[1] “QC results for sample S_dennistouni_25828_fastqc.zip”
| Measure | Value |
|---|---|
| Filename | S_dennistouni_25828.fq.gz |
| File type | Conventional base calls |
| Encoding | Sanger / Illumina 1.9 |
| Total Sequences | 558011 |
| Sequences flagged as poor quality | 0 |
| Sequence length | 95 |
| %GC | 37 |
[1] “QC results for sample S_dennistouni_25829_fastqc.zip”
| Measure | Value |
|---|---|
| Filename | S_dennistouni_25829.fq.gz |
| File type | Conventional base calls |
| Encoding | Sanger / Illumina 1.9 |
| Total Sequences | 16862 |
| Sequences flagged as poor quality | 0 |
| Sequence length | 95 |
| %GC | 36 |
[1] “QC results for sample S_dennistouni_25846_fastqc.zip”
| Measure | Value |
|---|---|
| Filename | S_dennistouni_25846.fq.gz |
| File type | Conventional base calls |
| Encoding | Sanger / Illumina 1.9 |
| Total Sequences | 290010 |
| Sequences flagged as poor quality | 0 |
| Sequence length | 95 |
| %GC | 37 |
[1] “QC results for sample S_dennistouni_25885_fastqc.zip”
| Measure | Value |
|---|---|
| Filename | S_dennistouni_25885.fq.gz |
| File type | Conventional base calls |
| Encoding | Sanger / Illumina 1.9 |
| Total Sequences | 1345393 |
| Sequences flagged as poor quality | 0 |
| Sequence length | 95 |
| %GC | 37 |
[1] “QC results for sample S_dennistouni_25898_fastqc.zip”
| Measure | Value |
|---|---|
| Filename | S_dennistouni_25898.fq.gz |
| File type | Conventional base calls |
| Encoding | Sanger / Illumina 1.9 |
| Total Sequences | 1077889 |
| Sequences flagged as poor quality | 0 |
| Sequence length | 95 |
| %GC | 36 |
[1] “QC results for sample S_dennistouni_25903_fastqc.zip”
| Measure | Value |
|---|---|
| Filename | S_dennistouni_25903.fq.gz |
| File type | Conventional base calls |
| Encoding | Sanger / Illumina 1.9 |
| Total Sequences | 2987903 |
| Sequences flagged as poor quality | 0 |
| Sequence length | 95 |
| %GC | 37 |
[1] “QC results for sample S_dennistouni_25908_fastqc.zip”
| Measure | Value |
|---|---|
| Filename | S_dennistouni_25908.fq.gz |
| File type | Conventional base calls |
| Encoding | Sanger / Illumina 1.9 |
| Total Sequences | 1829975 |
| Sequences flagged as poor quality | 0 |
| Sequence length | 95 |
| %GC | 36 |
[1] “QC results for sample S_dennistouni_25939_fastqc.zip”
| Measure | Value |
|---|---|
| Filename | S_dennistouni_25939.fq.gz |
| File type | Conventional base calls |
| Encoding | Sanger / Illumina 1.9 |
| Total Sequences | 2193303 |
| Sequences flagged as poor quality | 0 |
| Sequence length | 95 |
| %GC | 37 |
[1] “QC results for sample S_dennistouni_25950_fastqc.zip”
| Measure | Value |
|---|---|
| Filename | S_dennistouni_25950.fq.gz |
| File type | Conventional base calls |
| Encoding | Sanger / Illumina 1.9 |
| Total Sequences | 26200 |
| Sequences flagged as poor quality | 0 |
| Sequence length | 95 |
| %GC | 36 |
[1] “QC results for sample S_dennistouni_26573_fastqc.zip”
| Measure | Value |
|---|---|
| Filename | S_dennistouni_26573.fq.gz |
| File type | Conventional base calls |
| Encoding | Sanger / Illumina 1.9 |
| Total Sequences | 2038928 |
| Sequences flagged as poor quality | 0 |
| Sequence length | 95 |
| %GC | 37 |
[1] “QC results for sample S_dennistouni_26579_fastqc.zip”
| Measure | Value |
|---|---|
| Filename | S_dennistouni_26579.fq.gz |
| File type | Conventional base calls |
| Encoding | Sanger / Illumina 1.9 |
| Total Sequences | 2338750 |
| Sequences flagged as poor quality | 0 |
| Sequence length | 95 |
| %GC | 37 |
[1] “QC results for sample S_dennistouni_26961_fastqc.zip”
| Measure | Value |
|---|---|
| Filename | S_dennistouni_26961.fq.gz |
| File type | Conventional base calls |
| Encoding | Sanger / Illumina 1.9 |
| Total Sequences | 1899146 |
| Sequences flagged as poor quality | 0 |
| Sequence length | 95 |
| %GC | 37 |
[1] “QC results for sample S_dennistouni_CMNH38201_fastqc.zip”
| Measure | Value |
|---|---|
| Filename | S_dennistouni_CMNH38201.fq.gz |
| File type | Conventional base calls |
| Encoding | Sanger / Illumina 1.9 |
| Total Sequences | 2945 |
| Sequences flagged as poor quality | 0 |
| Sequence length | 95 |
| %GC | 36 |
[1] “QC results for sample S_nigrocapitata_14192_fastqc.zip”
| Measure | Value |
|---|---|
| Filename | S_nigrocapitata_14192.fq.gz |
| File type | Conventional base calls |
| Encoding | Sanger / Illumina 1.9 |
| Total Sequences | 507426 |
| Sequences flagged as poor quality | 0 |
| Sequence length | 95 |
| %GC | 37 |
[1] “QC results for sample S_nigrocapitata_14199_fastqc.zip”
| Measure | Value |
|---|---|
| Filename | S_nigrocapitata_14199.fq.gz |
| File type | Conventional base calls |
| Encoding | Sanger / Illumina 1.9 |
| Total Sequences | 1323513 |
| Sequences flagged as poor quality | 0 |
| Sequence length | 95 |
| %GC | 37 |
[1] “QC results for sample S_nigrocapitata_18034_fastqc.zip”
| Measure | Value |
|---|---|
| Filename | S_nigrocapitata_18034.fq.gz |
| File type | Conventional base calls |
| Encoding | Sanger / Illumina 1.9 |
| Total Sequences | 2180339 |
| Sequences flagged as poor quality | 0 |
| Sequence length | 95 |
| %GC | 37 |
[1] “QC results for sample S_nigrocapitata_18040_fastqc.zip”
| Measure | Value |
|---|---|
| Filename | S_nigrocapitata_18040.fq.gz |
| File type | Conventional base calls |
| Encoding | Sanger / Illumina 1.9 |
| Total Sequences | 5774993 |
| Sequences flagged as poor quality | 0 |
| Sequence length | 95 |
| %GC | 37 |
[1] “QC results for sample S_nigrocapitata_18083_fastqc.zip”
| Measure | Value |
|---|---|
| Filename | S_nigrocapitata_18083.fq.gz |
| File type | Conventional base calls |
| Encoding | Sanger / Illumina 1.9 |
| Total Sequences | 3303412 |
| Sequences flagged as poor quality | 0 |
| Sequence length | 95 |
| %GC | 37 |
[1] “QC results for sample S_nigrocapitata_25550_fastqc.zip”
| Measure | Value |
|---|---|
| Filename | S_nigrocapitata_25550.fq.gz |
| File type | Conventional base calls |
| Encoding | Sanger / Illumina 1.9 |
| Total Sequences | 193983 |
| Sequences flagged as poor quality | 0 |
| Sequence length | 95 |
| %GC | 37 |
[1] “QC results for sample S_nigrocapitata_25551_fastqc.zip”
| Measure | Value |
|---|---|
| Filename | S_nigrocapitata_25551.fq.gz |
| File type | Conventional base calls |
| Encoding | Sanger / Illumina 1.9 |
| Total Sequences | 717 |
| Sequences flagged as poor quality | 0 |
| Sequence length | 95 |
| %GC | 36 |
[1] “QC results for sample S_nigrocapitata_28214_fastqc.zip”
| Measure | Value |
|---|---|
| Filename | S_nigrocapitata_28214.fq.gz |
| File type | Conventional base calls |
| Encoding | Sanger / Illumina 1.9 |
| Total Sequences | 9305 |
| Sequences flagged as poor quality | 0 |
| Sequence length | 95 |
| %GC | 36 |
[1] “QC results for sample S_nigrocapitata_28215_fastqc.zip”
| Measure | Value |
|---|---|
| Filename | S_nigrocapitata_28215.fq.gz |
| File type | Conventional base calls |
| Encoding | Sanger / Illumina 1.9 |
| Total Sequences | 102531 |
| Sequences flagged as poor quality | 0 |
| Sequence length | 95 |
| %GC | 37 |
[1] “QC results for sample S_nigrocapitata_33030_fastqc.zip”
| Measure | Value |
|---|---|
| Filename | S_nigrocapitata_33030.fq.gz |
| File type | Conventional base calls |
| Encoding | Sanger / Illumina 1.9 |
| Total Sequences | 1031722 |
| Sequences flagged as poor quality | 0 |
| Sequence length | 95 |
| %GC | 37 |
[1] “QC results for sample S_nigrocapitata_33060_fastqc.zip”
| Measure | Value |
|---|---|
| Filename | S_nigrocapitata_33060.fq.gz |
| File type | Conventional base calls |
| Encoding | Sanger / Illumina 1.9 |
| Total Sequences | 322869 |
| Sequences flagged as poor quality | 0 |
| Sequence length | 95 |
| %GC | 37 |
[1] “QC results for sample S_nigrocapitata_FMNH472765_fastqc.zip”
| Measure | Value |
|---|---|
| Filename | S_nigrocapitata_FMNH472765.fq.gz |
| File type | Conventional base calls |
| Encoding | Sanger / Illumina 1.9 |
| Total Sequences | 4399 |
| Sequences flagged as poor quality | 0 |
| Sequence length | 95 |
| %GC | 37 |
[1] “QC results for sample S_plateni_19056_fastqc.zip”
| Measure | Value |
|---|---|
| Filename | S_plateni_19056.fq.gz |
| File type | Conventional base calls |
| Encoding | Sanger / Illumina 1.9 |
| Total Sequences | 1818956 |
| Sequences flagged as poor quality | 0 |
| Sequence length | 95 |
| %GC | 37 |
[1] “QC results for sample S_plateni_28305_fastqc.zip”
| Measure | Value |
|---|---|
| Filename | S_plateni_28305.fq.gz |
| File type | Conventional base calls |
| Encoding | Sanger / Illumina 1.9 |
| Total Sequences | 1826744 |
| Sequences flagged as poor quality | 0 |
| Sequence length | 95 |
| %GC | 37 |
[1] “QC results for sample S_plateni_28350_fastqc.zip”
| Measure | Value |
|---|---|
| Filename | S_plateni_28350.fq.gz |
| File type | Conventional base calls |
| Encoding | Sanger / Illumina 1.9 |
| Total Sequences | 944520 |
| Sequences flagged as poor quality | 0 |
| Sequence length | 95 |
| %GC | 37 |
[1] “QC results for sample S_whiteheadi_18001_fastqc.zip”
| Measure | Value |
|---|---|
| Filename | S_whiteheadi_18001.fq.gz |
| File type | Conventional base calls |
| Encoding | Sanger / Illumina 1.9 |
| Total Sequences | 2546312 |
| Sequences flagged as poor quality | 0 |
| Sequence length | 95 |
| %GC | 37 |
[1] “QC results for sample S_whiteheadi_20988_fastqc.zip”
| Measure | Value |
|---|---|
| Filename | S_whiteheadi_20988.fq.gz |
| File type | Conventional base calls |
| Encoding | Sanger / Illumina 1.9 |
| Total Sequences | 83409 |
| Sequences flagged as poor quality | 0 |
| Sequence length | 95 |
| %GC | 36 |
[1] "QC results for sample Stach__FMNH449754_fastqc.zip"
| Measure | Value |
|---|---|
| Filename | Stach__FMNH449754.fq.gz |
| File type | Conventional base calls |
| Encoding | Sanger / Illumina 1.9 |
| Total Sequences | 31563 |
| Sequences flagged as poor quality | 0 |
| Sequence length | 95 |
| %GC | 36 |
[1] "QC results for sample Stach__FMNH449756_fastqc.zip"
| Measure | Value |
|---|---|
| Filename | Stach__FMNH449756.fq.gz |
| File type | Conventional base calls |
| Encoding | Sanger / Illumina 1.9 |
| Total Sequences | 13154 |
| Sequences flagged as poor quality | 0 |
| Sequence length | 95 |
| %GC | 36 |
#aggregate the reports by pointing this function to the folder holding output of fastqc()
qc <- qc_aggregate(qc.dir, progressbar = F)
#stats per sample
knitr::kable(qc_stats(qc))
| sample | pct.dup | pct.gc | tot.seq | seq.length |
|---|---|---|---|---|
| S_capitalis_28326.fq.gz | 94.82 | 37 | 2271883 | 95 |
| S_capitalis_28338.fq.gz | 96.69 | 37 | 7353870 | 95 |
| S_capitalis_28339.fq.gz | 96.02 | 37 | 4011414 | 95 |
| S_capitalis_28341.fq.gz | 95.03 | 37 | 1289015 | 95 |
| S_capitalis_28342.fq.gz | 95.86 | 37 | 2393217 | 95 |
| S_capitalis_29959.fq.gz | 95.63 | 37 | 2212314 | 95 |
| S_capitalis_29965.fq.gz | 94.54 | 37 | 1115542 | 95 |
| S_capitalis_29968.fq.gz | 96.98 | 37 | 7389825 | 95 |
| S_capitalis_CMNH37769.fq.gz | 96.08 | 37 | 3199634 | 95 |
| S_dennistouni_19648.fq.gz | 93.81 | 37 | 617100 | 95 |
| S_dennistouni_19656.fq.gz | 97.04 | 37 | 8983624 | 95 |
| S_dennistouni_20186.fq.gz | 95.70 | 37 | 3219997 | 95 |
| S_dennistouni_20187.fq.gz | 96.16 | 37 | 3879876 | 95 |
| S_dennistouni_20188.fq.gz | 95.83 | 37 | 3825147 | 95 |
| S_dennistouni_20191.fq.gz | 96.01 | 37 | 3882279 | 95 |
| S_dennistouni_20201.fq.gz | 90.02 | 37 | 215019 | 95 |
| S_dennistouni_20222.fq.gz | 96.82 | 37 | 7410303 | 95 |
| S_dennistouni_20224.fq.gz | 96.57 | 37 | 4820610 | 95 |
| S_dennistouni_20225.fq.gz | 89.02 | 36 | 229061 | 95 |
| S_dennistouni_20229.fq.gz | 96.12 | 37 | 2196690 | 95 |
| S_dennistouni_20234.fq.gz | 81.53 | 36 | 67716 | 95 |
| S_dennistouni_20335.fq.gz | 92.02 | 37 | 354359 | 95 |
| S_dennistouni_21084.fq.gz | 96.33 | 37 | 2795688 | 95 |
| S_dennistouni_21086.fq.gz | 69.61 | 37 | 14622 | 95 |
| S_dennistouni_21112.fq.gz | 95.58 | 37 | 3352183 | 95 |
| S_dennistouni_25696.fq.gz | 95.62 | 37 | 3189487 | 95 |
| S_dennistouni_25702.fq.gz | 96.20 | 37 | 4377719 | 95 |
| S_dennistouni_25703.fq.gz | 96.61 | 37 | 4087648 | 95 |
| S_dennistouni_25713.fq.gz | 54.52 | 36 | 14815 | 95 |
| S_dennistouni_25716.fq.gz | 95.89 | 36 | 2043166 | 95 |
| S_dennistouni_25743.fq.gz | 92.25 | 37 | 404420 | 95 |
| S_dennistouni_25817.fq.gz | 95.53 | 37 | 3473626 | 95 |
| S_dennistouni_25828.fq.gz | 93.21 | 37 | 558011 | 95 |
| S_dennistouni_25829.fq.gz | 75.00 | 36 | 16862 | 95 |
| S_dennistouni_25846.fq.gz | 90.57 | 37 | 290010 | 95 |
| S_dennistouni_25885.fq.gz | 93.69 | 37 | 1345393 | 95 |
| S_dennistouni_25898.fq.gz | 94.96 | 36 | 1077889 | 95 |
| S_dennistouni_25903.fq.gz | 96.20 | 37 | 2987903 | 95 |
| S_dennistouni_25908.fq.gz | 95.65 | 36 | 1829975 | 95 |
| S_dennistouni_25939.fq.gz | 95.28 | 37 | 2193303 | 95 |
| S_dennistouni_25950.fq.gz | 65.93 | 36 | 26200 | 95 |
| S_dennistouni_26573.fq.gz | 95.68 | 37 | 2038928 | 95 |
| S_dennistouni_26579.fq.gz | 95.24 | 37 | 2338750 | 95 |
| S_dennistouni_26961.fq.gz | 95.02 | 37 | 1899146 | 95 |
| S_dennistouni_CMNH38201.fq.gz | 25.91 | 36 | 2945 | 95 |
| S_nigrocapitata_14192.fq.gz | 92.51 | 37 | 507426 | 95 |
| S_nigrocapitata_14199.fq.gz | 93.96 | 37 | 1323513 | 95 |
| S_nigrocapitata_18034.fq.gz | 95.47 | 37 | 2180339 | 95 |
| S_nigrocapitata_18040.fq.gz | 96.62 | 37 | 5774993 | 95 |
| S_nigrocapitata_18083.fq.gz | 96.28 | 37 | 3303412 | 95 |
| S_nigrocapitata_25550.fq.gz | 89.71 | 37 | 193983 | 95 |
| S_nigrocapitata_25551.fq.gz | 66.95 | 36 | 717 | 95 |
| S_nigrocapitata_28214.fq.gz | 75.83 | 36 | 9305 | 95 |
| S_nigrocapitata_28215.fq.gz | 83.63 | 37 | 102531 | 95 |
| S_nigrocapitata_33030.fq.gz | 94.38 | 37 | 1031722 | 95 |
| S_nigrocapitata_33060.fq.gz | 90.97 | 37 | 322869 | 95 |
| S_nigrocapitata_FMNH472765.fq.gz | 53.81 | 37 | 4399 | 95 |
| S_plateni_19056.fq.gz | 95.50 | 37 | 1818956 | 95 |
| S_plateni_28305.fq.gz | 95.89 | 37 | 1826744 | 95 |
| S_plateni_28350.fq.gz | 90.48 | 37 | 944520 | 95 |
| S_whiteheadi_18001.fq.gz | 95.83 | 37 | 2546312 | 95 |
| S_whiteheadi_20988.fq.gz | 70.18 | 36 | 83409 | 95 |
| Stach__FMNH449754.fq.gz | 69.15 | 36 | 31563 | 95 |
| Stach__FMNH449756.fq.gz | 70.15 | 36 | 13154 | 95 |
#save stats info as an object
stats.info<-qc_stats(qc)
#make tot.seq numeric
stats.info$tot.seq<-as.numeric(stats.info$tot.seq)
#make histogram of number of sequence reads for each sample
ggplot(stats.info, aes(x=tot.seq))+
geom_histogram(color="black", fill="white", bins=20)+
geom_vline(aes(xintercept=median(tot.seq)), color = "red")+
geom_vline(aes(xintercept=median(tot.seq)*.1), color = "red", lty=14)+
theme_classic()+
xlab("Number of sequencing reads")
#solid red line = median sample value
#dashed red line = 10% of median sample value
ggplot(stats.info, aes(x=tot.seq))+
geom_histogram(color="black", fill="white", bins=200)+
geom_vline(aes(xintercept=median(tot.seq)), color = "red")+
geom_vline(aes(xintercept=median(tot.seq)*.1), color = "red", lty=14)+
theme_classic()+
xlab("Number of sequencing reads")
#show me the samples that have less than 10% of the number of reads as the median sample from this experiment (these should be dropped immediately)
print(paste("Median sample contains", median(stats.info$tot.seq), "reads. The following samples contain less than", median(stats.info$tot.seq)*.1, "reads (10% of the median), and should likely be dropped"))
[1] “Median sample contains 1828359.5 reads. The following samples contain less than 182835.95 reads (10% of the median), and should likely be dropped”
knitr::kable(stats.info[stats.info$tot.seq < median(stats.info$tot.seq)*.1,])
| sample | pct.dup | pct.gc | tot.seq | seq.length |
|---|---|---|---|---|
| S_dennistouni_20234.fq.gz | 81.53 | 36 | 67716 | 95 |
| S_dennistouni_21086.fq.gz | 69.61 | 37 | 14622 | 95 |
| S_dennistouni_25713.fq.gz | 54.52 | 36 | 14815 | 95 |
| S_dennistouni_25829.fq.gz | 75.00 | 36 | 16862 | 95 |
| S_dennistouni_25950.fq.gz | 65.93 | 36 | 26200 | 95 |
| S_dennistouni_CMNH38201.fq.gz | 25.91 | 36 | 2945 | 95 |
| S_nigrocapitata_25551.fq.gz | 66.95 | 36 | 717 | 95 |
| S_nigrocapitata_28214.fq.gz | 75.83 | 36 | 9305 | 95 |
| S_nigrocapitata_28215.fq.gz | 83.63 | 37 | 102531 | 95 |
| S_nigrocapitata_FMNH472765.fq.gz | 53.81 | 37 | 4399 | 95 |
| S_whiteheadi_20988.fq.gz | 70.18 | 36 | 83409 | 95 |
| Stach__FMNH449754.fq.gz | 69.15 | 36 | 31563 | 95 |
| Stach__FMNH449756.fq.gz | 70.15 | 36 | 13154 | 95 |